*******************************************************************************
* Project: Pollution and achievement
*links wind direction to raods and then roads to road monitors.  
* ********************************************************************

*************version control**************************************************



*****************Globals***********************
* change file directories if necessary
	global home "C:\Users\das13016\Dropbox\Research and Referee work\papers\inprocess\Pollution\Research on Florida Wind Patters"

	global data "$home\School wind pollution\School wind road"
	global output "$home\First Stage\output\"
	global samples "$home\First Stage\samples"
	global roaddata "$home\roadsetup\rawdata"

	
clear
set more off
capture log close
log using "$output\FLmontoroad_062018.log", text replace

use "$data\wnpFL2010_5mi_lax_dropmonitor.dta", clear  /* we are loading this in just for the pollution monitor locations */


keep EPAlat EPAlon EPAsite

**JAH: Make EPAlat EPAlon constant - some pollutants rounded differently than others
foreach var of varlist EPAlat EPAlon { 		
	egen temp=mean(`var'), by(EPAsite)	
	replace `var'=temp
	drop temp
	}
	
preserve
use "$roaddata\aadt\mjrrdsjah.dta", replace



***************Create line segments***********************************	
	***Getting prior segment piece's lat/long, if it's from the same roadway	- JAH
foreach var of varlist latitude longitude {
	gen `var'_prior=`var'[_n-1]
	replace `var'_prior=. if `var'==0
	replace `var'_prior=. if `var'_prior==0
	}


***getting segments (will be one less than the number of end points (e.g., if you have 3 points, you have 2 line segments) - JAH	
	geodist latitude longitude latitude_prior longitude_prior, generate(segmentdistance) miles
	label var segmentdistance "Road segment distance in miles"
	sum segmentdistance, d
	*hist segmentdistance if segmentdistance>0.1, freq
	
***AND NOW: DIVIDING UP SEGMENTS >0.1: For ordering purposes:
	gen n=_n

	gen segexp=floor(segmentdistance*100)+1  /**If a segment is longer than 0.01 mi,
	round up to the nearest 10th, multiply by 10, and divide it into that number of segments
	Change X in segmentdistance*X to make the minimum distance 1/X miles */
		
	expand segexp
	sort n
	*get a count by segment:
	by n: generate segord=_n

	**Replacing:
	foreach var of varlist latitude longitude {
		gen `var'_original=`var'
		*take the previous latitude, add on the extra fraction of the distance to get to the next value in the data 
		*(or each segment, so multiple by order 1-2-3...)
		gen dist`var' = (`var'-`var'_prior)/segexp if segexp>1
		replace `var'=`var'_prior+segord*(`var'-`var'_prior)/segexp if segexp>1
		*useful for checking: 
		order `var' `var'_original `var'_prior segexp dist`var' segord segmentdistance n
												}	
*DES: something to think about: precision of storage in stata: I think this is OK though
												
	drop id distlatitude distlongitude
	gen id=_n
	sum id
	order latitude longitude segmentdistance AADT ROADWAY BEGIN_POST END_POST Shape_Leng id


	

	**Chceck 1:  Verify no more long segments:
	***Getting prior segment piece's lat/long, if it's from the same roadway	- JAH
	*foreach var of varlist latitude longitude {
	*	gen `var'_prior_2=`var'[_n-1]
	*	replace `var'_prior_2=. if `var'==0
	*	replace `var'_prior_2=. if `var'_prior==0
	*	}
	***getting segments (will be one less than the number of end points (e.g., if you have 3 points, you have 2 line segments) - JAH	
	*geodist latitude longitude latitude_prior_2 longitude_prior_2, generate(segmentdistance_2) miles
	*	label var segmentdistance "Road segment distance in miles"
	*	sum segmentdistance_2, d
	*	hist segmentdistance_2 if segmentdistance_2>0.1, freq
	***saving this new data as a temporary file:
	
	destring ROADWAY, replace
	
	tempfile majorrd
	save "`majorrd'"	

	*******************now create calculate nearest road to school***************

***Make tempfile that will eventually have the list of roads/matches added to it:
clear
tempfile majorrd_matches
gen epa=.
save "`majorrd_matches'"

***create a dataset just of monitors and identify the number to loop through:
restore

duplicates drop EPAsite, force

levelsof EPAsite


gen sitenum = _n

sum sitenum
tempfile epaloc
save "`epaloc'"

*there are 91 pollution sites

**2. To get the first-fifth nearest road, by school
*forv schoollist=1/5 {  //Ljust do 5 schools for trouble-shooting
forv epalist=1/91 {  //Loop thru for every school individually - COULDN"T FIGURE OUT HOW TO GET A TEMPVAR TO SAY 4289, in case something changes with the file?
	forv i=1/5 {
		if `i'==1 {			// First time around, make a temporary file of roads to use for matching; this will be reduced down later
			use "`majorrd'", clear
			tempfile majorrd_`epalist'
			qui: save "`majorrd_`epalist''"
			display "monitor `epalist' of 91"
			}	
		use "`epaloc'", replace // open up NCES list of schools
		
		
		qui: keep if _n==`epalist' // TO LIMIT TO ONE monitor at a time
		
		qui: geonear sitenum EPAlat EPAlon using "`majorrd_`epalist''", neighbors(id latitude longitude) miles //this is where mi_to_nid is made
		

		*merge road data - must set to local drive
		rename nid id

		qui: merge m:1 id using `majorrd'
		qui: tab _merge

		* DES 6/16/17: there are more road points than monitors so not every pollution monitor
		* data point has a match in the lac_STROUTEs data, should be fine to drop unmatched;

		qui: drop if _merge==2
		drop _merge

		rename (latitude longitude) (road_lat road_lon)
		
		rename id road_id`i'
		foreach var of varlist road_lat road_lon mi_to_nid AADT ROADWAY ROUTE {
			rename `var' `var'`i'
			}

		keep road_lat`i' road_lon`i' EPAlat EPAlon sitenum EPAsite mi_to_nid`i' ROADWAY`i' ROUTE`i' AADT`i'
		
		local roadtodrop=ROADWAY`i'
		append using "`majorrd_matches'" // append to the list of school-to-road matches
		qui: save "`majorrd_matches'", replace
		
		***NOW, delete the ROADWAY we just matched on and loop back over the next closest one:
		use "`majorrd_`epalist''", clear
		qui: drop if ROADWAY==`roadtodrop'
		qui: save "`majorrd_`epalist''", replace
		}
	}

use "`majorrd_matches'", clear
foreach var of varlist ROUTE* {
	forv i=1/4 {
		replace `var'=`var'[_n-`i'] if sitenum==sitenum[_n-`i']&`var'[_n-`i']!=""
		replace `var'=`var'[_n+`i'] if sitenum==sitenum[_n+`i']&`var'[_n+`i']!=""
	}
 }

 
collapse  EPAlat EPAlon mi_to_nid5 road_lat5 road_lon5 AADT5 ROADWAY5 mi_to_nid4 road_lat4 road_lon4 AADT4 ROADWAY4 mi_to_nid3 road_lat3 road_lon3 AADT3 ROADWAY3 mi_to_nid2 road_lat2 road_lon2 AADT2 ROADWAY2 mi_to_nid1 road_lat1 road_lon1 AADT1 ROADWAY1, by( EPAsite sitenum ROUTE*)

**************Now try to calculate initial bearing of pollution monitor to road*******************
* check this against this formula:  http://www.movable-type.co.uk/scripts/latlong.html


	g lat1=EPAlat*_pi/180
	g lon1=EPAlon*_pi/180

forv i=1/5 {
	g lat2`i'=road_lat`i'*_pi/180
	g lon2`i'=road_lon`i'*_pi/180
	g theta`i'=atan2(sin(lon2`i'-lon1)*cos(lat2`i'), cos(lat1)*sin(lat2`i')-sin(lat1)*cos(lat2`i')*cos(lon2`i'-lon1)) 
	gen degrees`i' = (180*theta`i')/_pi
	g angle_degrees`i'=mod((degrees`i'+360), 360)
}

*reality check: this gives us a bearing between 1 and 360
	sum angle_degrees*



save "$samples\EPA_USHandIS.dta", replace

clear

log close
